Code
rm(list = ls())
# Carregando pacotes
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(tidymodels)── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
✔ broom 1.0.6 ✔ rsample 1.2.1
✔ dials 1.3.0 ✔ tune 1.2.1
✔ infer 1.0.7 ✔ workflows 1.1.4
✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
✔ parsnip 1.2.1 ✔ yardstick 1.3.1
✔ recipes 1.1.0
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter() masks stats::filter()
✖ recipes::fixed() masks stringr::fixed()
✖ dplyr::lag() masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step() masks stats::step()
• Use suppressPackageStartupMessages() to eliminate package startup messages
Code
library(tibble)
library(purrr)
library(ggplot2)
library(patchwork)
library(workflowsets)
library(yardstick)
library(glmnet)Carregando pacotes exigidos: Matrix
Anexando pacote: 'Matrix'
Os seguintes objetos são mascarados por 'package:tidyr':
expand, pack, unpack
Loaded glmnet 4.1-8
Code
# Dando preferencias as funcoes do tidymodels
tidymodels::tidymodels_prefer()
# Setando a semente
set.seed(0)
# Função para gerar os dados
gerando_dados <- function(n = 5000L){
regressao <- function(i){
x <- rnorm(n = 5000L, 0, 1)
target <- 7*x[1L] - 5*x[2L] + 2*x[3L] + 4*x[4L] + 9*x[5L] + rnorm(1L, 0, 0.5)
tibble(
y = target,
x1 = x[1L],
x2 = x[2L],
x3 = x[3L],
x4 = x[4L],
x5 = x[5L]
)
}
dados <- purrr::map(.x = 1L:n, .f = regressao) %>%
purrr::list_rbind()
parte_esparsa <- matrix(0, n, 15)
dados <- cbind(dados, parte_esparsa)
colnames(dados) <- c("y", paste0("x", 1L:20L))
tibble::as_tibble(dados)
}
dados <- gerando_dados()
# Realizando o hold-out
dados_split <- rsample::initial_split(dados, prop = 0.8, strata = "y")
treino <- rsample::training(dados_split)
teste <- rsample::testing(dados_split)
# Setando o modelo (set engine)
modelo_ridge <-
parsnip::linear_reg(penalty = tune::tune(), mixture = 0) %>%
parsnip::set_mode("regression") %>%
parsnip::set_engine("glmnet")
modelo_lasso <-
parsnip::linear_reg(penalty = tune::tune(), mixture = 1) %>%
parsnip::set_mode("regression") %>%
parsnip::set_engine("glmnet")
# Criando workflows
all_wf <-
workflowsets::workflow_set(
preproc = list(y ~ .),
models = list(ridge = modelo_ridge, lasso = modelo_lasso),
cross = TRUE
)
# Validação cruzada
set.seed(0)
cv <- rsample::vfold_cv(treino, v = 10L)
# Setando a métrica
metrica <- yardstick::metric_set(rmse)
# Tunagem dos hiperparâmetros
tunagem <-
all_wf %>%
workflowsets::workflow_map(
seed = 0,
verbose = TRUE,
resamples = cv,
grid = 50,
metrics = metrica
)i 1 of 2 tuning: formula_ridge
✔ 1 of 2 tuning: formula_ridge (2.4s)
i 2 of 2 tuning: formula_lasso
✔ 2 of 2 tuning: formula_lasso (1.8s)